%%VUOLTEENAHO.m
% This program creates VUOLTEENAHO.csv, which will be used in Stata for
% return decomposition tests

% This program is part of the data and code used in:
% Kenneth R. Ahern, "Do Common Stocks Have Perfect Substitutes? 
% Product Market Competition and the Elasticity of Demand for Stocks," 
% The Review of Economics and Statistics, October 2014, 96(4):756-766.

%Vuolteenaho Return Decomposition
clear all
%Compustat data
fid=fopen(['Vuolteenaho Compustat 1950 1990 Raw.csv'])
VA=textscan(fid,'%f %f %f %f %*q %*q %*q %*q %*q %f %f %f %f %f %f %f %f %*q %f %f','HeaderLines',1,'Delimiter',',');
fclose(fid)
for i=1:14;
    VAM(:,i)=VA{i};
end

%CRSP Data
fid=fopen(['Vuolteenaho CRSP 1950 1990 Coded.csv'])
VP=textscan(fid,'%f %f %f %f %f %f','Delimiter',',');
fclose(fid)
for i=1:6;
    VPM(:,i)=VP{i};
end

%1 DATE
%2 PERMNO
%3 DLRET
%4 SHROUT
%5 PRC
%6 RET

	%   item
%CEQ	60
%CEQL	235
%TXDITC	35
%TXP	71
%NI     172
%DLC	34
%DLTT	9
%PSTK	130

%1 gvkey	
%2 lpermno
%3 datadate
%4 fyear
%5 CEQ
%6 CEQL
%7 DLC
%8 DLTT
%9 NI
%10 PSTK
%11 TXDITC
%12 TXP
%13 SIC
%14 SICH

clear VP VA
%Build sample with filters
%Only keep end of year FYR=12 obs
FYRID=find(floor(VAM(:,3)/100)-100*floor(VAM(:,3)/10000)~=12);
VAM(FYRID,:)=[];

%Compute book equity
BE=NaN*ones(length(VAM(:,1)),1);
for i=1:length(VAM);
    BE(i,1)=VAM(i,5);
    if isnan(BE(i,1))==1;
        BE(i,1)=VAM(i,6);
    end
    if BE(i,1)~=0|isnan(BE(i,1))~=1
        BE(i,1)=nansum([BE(i,1),VAM(i,11),VAM(i,12)],2);
    end
    if BE(i,1)==0
        BE(i,1)=NaN;
    end
end

%Compute GAAP ROE
ROE=NaN*ones(length(VAM(:,1)),1);
for i=2:length(VAM);
    if VAM(i,1)==VAM(i-1,1) & isnan(BE(i-1))==0
        ROE(i,1)=VAM(i,9)/BE(i-1);
    end
    if ROE(i,1)<=-1
        ROE(i,1)=NaN;
    end
end

%Compute RETs and ME
RET=NaN*ones(length(VAM(:,1)),1);
ME=NaN*ones(length(VAM(:,1)),1);
for i=1:length(VAM)
    clear ID
    if mod(i,100)==0
        clc
        disp([num2str(i)])
    end
    ID=find((VPM(:,2)==VAM(i,2))&(VPM(:,1)<=VAM(i,4)*10000+0531)&(VPM(:,1)>(VAM(i,4)-1)*10000+0531)); %find Permnos in CRSP to match Compustat
    if isempty(ID)==0;
        RET(i,1)=prod(1+VPM(ID,6))-1;
        ME(i,1)=abs(VPM(ID(end),4)*(VPM(ID(end),5)));
    end
end
ME(ME<=1000)=NaN; % drop if less than $10 mill
BM=BE./(ME/1000);
BM(BM>100)=NaN;
BM(BM<1/100)=NaN;

%Impose lagged filters
SMF=NaN*ones(length(VAM(:,1)),1);
    BEID1=NaN*ones(length(VAM(:,1)),1);
    BEID2=NaN*ones(length(VAM(:,1)),1);
    BEID3=NaN*ones(length(VAM(:,1)),1);
    MEID1=NaN*ones(length(VAM(:,1)),1);
    MEID2=NaN*ones(length(VAM(:,1)),1);
    MEID3=NaN*ones(length(VAM(:,1)),1);
    NIID1=NaN*ones(length(VAM(:,1)),1);
    NIID2=NaN*ones(length(VAM(:,1)),1);
    RETID1=NaN*ones(length(VAM(:,1)),1);
    RETID2=NaN*ones(length(VAM(:,1)),1);
    RETID3=NaN*ones(length(VAM(:,1)),1);
    RETID4=NaN*ones(length(VAM(:,1)),1);
    RETID5=NaN*ones(length(VAM(:,1)),1);

for i=6:length(VAM(:,1))
    if mod(i,100)==0
        clc
        disp([num2str(i)])
    end
    ID1=find((VAM(:,2)==VAM(i,2))&(VAM(:,4)==VAM(i,4)-1),1,'first');
    ID2=find((VAM(:,2)==VAM(i,2))&(VAM(:,4)==VAM(i,4)-2),1,'first');
    ID3=find((VAM(:,2)==VAM(i,2))&(VAM(:,4)==VAM(i,4)-3),1,'first');
    ID4=find((VAM(:,2)==VAM(i,2))&(VAM(:,4)==VAM(i,4)-4),1,'first');
    ID5=find((VAM(:,2)==VAM(i,2))&(VAM(:,4)==VAM(i,4)-5),1,'first');

    
    if isempty(ID1)==0
    BEID1(i,1)=isfinite(BE(ID1));
    MEID1(i,1)=isfinite(ME(ID1));
    NIID1(i,1)=isfinite(VAM(ID1,9));
    RETID1(i,1)=isfinite(RET(ID1));
    end
    if isempty(ID2)==0
    BEID2(i,1)=isfinite(BE(ID2));
    MEID2(i,1)=isfinite(BE(ID2));
    NIID2(i,1)=isfinite(VAM(ID2,9));
    RETID2(i,1)=isfinite(RET(ID2));
    end
    if isempty(ID3)==0
    BEID3(i,1)=isfinite(BE(ID3));
    MEID3(i,1)=isfinite(ME(ID3));
    RETID3(i,1)=isfinite(RET(ID3));
    end
    if isempty(ID4)==0
    RETID4(i,1)=isfinite(RET(ID4));
    end
    if isempty(ID5)==0
    RETID5(i,1)=isfinite(RET(ID5));
    end
    if sum([BEID1(i,1) BEID2(i,1) BEID3(i,1) MEID1(i,1) MEID2(i,1) MEID3(i,1) NIID1(i,1) NIID2(i,1) RETID1(i,1) RETID2(i,1) RETID3(i,1) RETID4(i,1) RETID5(i,1)])==13
        SMF(i,1)=1;
    end
end
SMFI=find(SMF==1);

VAM=VAM(SMFI,:)
ROE=ROE(SMFI);
BM=BM(SMFI);
BE=BE(SMFI);
ME=ME(SMFI);
RET=RET(SMFI);

ROE_MA=NaN*ones(length(VAM(:,1)),1);
BM_MA=NaN*ones(length(VAM(:,1)),1);
RET_MA=NaN*ones(length(VAM(:,1)),1);

for i=1:length(VAM)
    if mod(i,100)==0
        clc
        disp([num2str(i) ' of ' num2str(length(VAM))])
    end
    ID=find(VAM(:,4)==VAM(i,4));
    ROE_MA(i,1)=log(1+ROE(i,1))-nanmean(log(1+ROE(ID,1)));
    BM_MA(i,1)=log(1+BM(i,1))-nanmean(log(1+BM(ID,1)));
    RET_MA(i,1)=log(1+RET(i,1))-nanmean(log(1+RET(ID,1)));
end

REGMAT=NaN*ones(length(VAM(:,1)),6);
REGMAT(2:end,1:3)=[RET_MA(2:end) BM_MA(2:end) ROE_MA(2:end)];
for i=2:length(VAM(:,1));
    NOBS0=sum(VAM(:,4)==VAM(i,4));
    NOBS1=sum(VAM(:,4)==VAM(i,4)-1);
    REGMAT(i,1:3)=REGMAT(i,1:3)./NOBS0;
    ID=find((VAM(i,2)==VAM(:,2))&(VAM(i,4)-1==VAM(:,4)),1,'first');
    if isempty(ID)==0
        REGMAT(i,4)=RET_MA(ID)/NOBS1;
        REGMAT(i,5)=BM_MA(ID)/NOBS1;
        REGMAT(i,6)=ROE_MA(ID)/NOBS1;
    end
end

MISS=sum(isnan(REGMAT),2);
NMI=find(MISS==0);

REGRET=ols(REGMAT(NMI,1),REGMAT(NMI,4:6));
REGBM=ols(REGMAT(NMI,2),REGMAT(NMI,4:6));
REGROE=ols(REGMAT(NMI,3),REGMAT(NMI,4:6));
GAMMA=[REGRET.beta REGBM.beta REGROE.beta];

RESRET=REGRET.resid;
RESBM=REGBM.resid;
RESROE=REGROE.resid;

RHO=.967; %By assumption
LAMBDAP=[1 0 0]*RHO*GAMMA*inv(eye(3)-RHO*GAMMA); %1 x 3 vector

%Expected-return news can then be conveniently expressed as
NR=[LAMBDAP*[RESRET';RESBM';RESROE']]'; 

%Cash-flow news as (el' + A')u , t.
NCF=[([1 0 0]+LAMBDAP)*[RESRET';RESBM';RESROE']]';

%Make matrices align by year with years on rows and permnos on columns
VAMNMI=VAM(NMI,:);%same index as final sample
YEARS=unique(VAM(NMI,4));
YEARS(isnan(YEARS))=[];
PERMNOS=unique(VAM(NMI,2));
PERMNOS(isnan(PERMNOS))=[];
for p=1:length(PERMNOS)
    ID=find(PERMNOS(p)==VAMNMI(:,2),1,'last');
    if isempty(ID)==0
        SICS4(p,1)=VAMNMI(ID,14);
    else
        SICS4(p,1)=NaN;
    end
end
SICS3=floor(SICS4/10);
SICS2=floor(SICS4/100);
NRMAT=NaN*ones(length(YEARS),length(PERMNOS));
NCFMAT=NaN*ones(length(YEARS),length(PERMNOS));
RETMAT=NaN*ones(length(YEARS),length(PERMNOS));
for i=1:length(PERMNOS)
    if mod(i,100)==0
        clc
        disp ([num2str(i) ' of ' num2str(length(PERMNOS))])
    end
    for y=1:length(YEARS)
        ID=find((VAMNMI(:,2)==PERMNOS(i))&(VAMNMI(:,4)==YEARS(y)),1,'first');%find match permno and year
        if isempty(ID)==0;
            NRMAT(y,i)=NR(ID); %Exp return news
            NCFMAT(y,i)=NCF(ID); %CF news
        end
    end
end

%Load FF Factors
fid=fopen(['FFMONTHLY.csv'])
FF=textscan(fid,'%f %f %f %f %f %f','Delimiter',',');
fclose(fid)
for i=1:6;
    FFM(:,i)=FF{i};
end
%1 Date
%2 MRT-RF
%3 SMB
%4 HML
%5 RF
%6 MOM

MRF=NaN*ones(length(YEARS),1);
SMB=NaN*ones(length(YEARS),1);
HML=NaN*ones(length(YEARS),1);
RFF=NaN*ones(length(YEARS),1);
MOM=NaN*ones(length(YEARS),1);

for y=1:length(YEARS)
    clear ID
    ID=find((FFM(:,1)<=YEARS(y)*100+06)&(FFM(:,1)>(YEARS(y)-1)*100+05)); %find Permnos in CRSP to match Compustat
    if isempty(ID)==0;
        MRF(y,1)=prod(1+(FFM(ID,2)/100))-1;
        SMB(y,1)=prod(1+(FFM(ID,3)/100))-1;
        HML(y,1)=prod(1+(FFM(ID,4)/100))-1;
        RFF(y,1)=prod(1+(FFM(ID,5)/100))-1;
        MOM(y,1)=prod(1+(FFM(ID,6)/100))-1;
    end
end
MRK=MRF+RFF;


%%NR CORRELATIONS
NRFF3FCOIIND2=NaN*ones(length(PERMNOS),1);
for f=1:length(PERMNOS)
    clc
    disp([num2str(f) ' of ' num2str(length(PERMNOS))])
    clear ID1;
    ID1=find((SICS2(f)==SICS2));
    if length(ID1)>5;
        NRMATA=NRMAT(:,f);
        NRMATT=nanmean(NRMAT(:,ID1(ID1~=f)),2);
        REGDATA=[NRMATA ones(35,1) MRK SMB HML MOM NRMATT ]; %SAMMAT is the (250,8) [NRA 1 NRT] matrix of data for analysis
        REGDATA=REGDATA(isnan(REGDATA(:,1))==0,:);
        if length(REGDATA(:,1))>=10 %need at least 10 years
            FF3F=ols(REGDATA(:,1),REGDATA(:,[2:5 7])); %OLS Coefficients with Equal Weighted Index + Tar
            NRFF3FCOIIND2(f,1)=FF3F.beta(5); %industry coef
        end
    end
end


%%NCF CORRELATIONS
NCFFF3FCOIIND2=NaN*ones(length(PERMNOS),1);
for f=1:length(PERMNOS)
    clc
    disp([num2str(f) ' of ' num2str(length(PERMNOS))])
    clear ID1;
    ID1=find((SICS2(f)==SICS2));
    if length(ID1)>5;
        NCFMATA=NCFMAT(:,f);
        NCFMATT=nanmean(NCFMAT(:,ID1(ID1~=f)),2);
        REGDATA=[NCFMATA ones(35,1) MRK SMB HML MOM NCFMATT ]; %SAMMAT is the (250,8) [NCFA 1 NCFT] matrix of data for analysis
        REGDATA=REGDATA(isnan(REGDATA(:,1))==0,:);
        if length(REGDATA(:,1))>=10 %need at least 10 years
            FF3F=ols(REGDATA(:,1),REGDATA(:,[2:5 7])); %OLS Coefficients with Equal Weighted Index + Tar
            NCFFF3FCOIIND2(f,1)=FF3F.beta(5); %industry coef
        end
    end
end
%These are coefficients per firm.  Need to aggregate to industry level and
%match prod market elasticity to SIC2 digit level.



%Industry markup data
IMA=xlsread('IndustryMarkupAll.xls');

%Get median per 2-digit SIC code
INDU=unique(SICS2); %unique 2-digit SICs
INDCOM=NaN(length(INDU),3); %Placeholder
for i=1:length(INDU);
    ID=find(IMA(:,1)==INDU(i));
    if isempty(ID)==0;
        INDCOM(i,1)=IMA(ID,18); % record prod mkt elas
        ID1=find(SICS2==INDU(i)); %all firms in SIC
        if isempty(ID1)==0
            INDCOM(i,2:3)=nanmedian([NCFFF3FCOIIND2(ID1) NRFF3FCOIIND2(ID1)]);
        end
    end
end

% Save a matlab file
save(['VUOLTEENAHO.mat'],'INDCOM');

% Save a csv file for Stata
STATA_HD={'elas_prod',...
'comove_cashflow',...
'comove_discountrate',...
};

cell2csv('VUOLTEENAHO.csv',STATA_HD,',');
dlmwrite('VUOLTEENAHO.csv',INDCOM,'delimiter',',','precision','%1.9f','-append');
